This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(readxl)
library(lmtest)
Loading required package: zoo
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
library(forecast)
Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
This is forecast 8.21.1
Want to meet other forecasters? Join the International Institute of Forecasters:
http://forecasters.org/
library(DIMORA)
Loading required package: minpack.lm
Loading required package: numDeriv
Loading required package: reshape2
Loading required package: deSolve
library(fpp2)
── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── fpp2 2.5 ──
✔ ggplot2 3.4.4 ✔ expsmooth 2.3
✔ fma 2.5
library(ggplot2)
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(tseries)
‘tseries’ version: 0.10-55
‘tseries’ is a package for time series analysis and computational finance.
See ‘library(help="tseries")’ for details.
library(randomForest)
randomForest 4.7-1.1
Type rfNews() to see new features/changes/bug fixes.
Attaching package: ‘randomForest’
The following object is masked from ‘package:ggplot2’:
margin
library(BASS)
library(Metrics)
Attaching package: ‘Metrics’
The following object is masked from ‘package:forecast’:
accuracy
library(dplyr)
Attaching package: ‘dplyr’
The following object is masked from ‘package:randomForest’:
combine
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(tibble)
# folder_path = "~/Desktop/unipd/time-series/project/data/"
folder_path = "./data/"
# shares_file <- paste0(folder_path, "us-total-share-prices.csv")
# m2_file <- paste0(folder_path, "us-M2.csv")
# usir_file <- paste0(folder_path, "us-interest-rate.csv")
euir_file <- paste0(folder_path, "eu-interest-rate.csv")
# uscpi_file <- paste0(folder_path, "us-consumer-price-index.csv")
eucpi_file <- paste0(folder_path, "eu-consumer-price-index.csv")
rate_file <- paste0(folder_path, "euro-daily-hist_1999_2022.csv")
# shares<- read.csv(shares_file)
# usm2 <- read.csv(m2_file)
# usir <- read.csv(usir_file)
euir <- read.csv(euir_file)
# uscpi <- read.csv(uscpi_file)
eucpi <- read.csv(eucpi_file)
rate <- read.csv(rate_file)
# ### Looking at share price
# shares$DATE <- as.Date(shares$DATE)
#
# # plot(shares$DATE, shares$SPASTT01USM661N, xlab = "Time", ylab = "Price")
# gg <- ggplot(shares, aes(x = DATE, y = SPASTT01USM661N))+
# geom_line(aes(group = 1), color = "blue", linewidth = 0.5) +
# geom_point() +
# labs(
# title = "Total Share Prices Change with Time",
# x = "Time",
# y = "Index 2015 = 100"
# ) +
# scale_x_date(date_labels = "%Y-%m", date_breaks = "1 year") +
# theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
#
# plotly_gg <- ggplotly(gg)
#
# plotly_gg
### There is a clear increasing trend that is confirmed by the ACF being the highest
### at the smaller lags
# ggAcf(shares$SPASTT01USM661N, lag = 760)
### Attempting modeling share prices with Naive Method
# nmshares_model <- meanf(shares$SPASTT01USM661N, h = 1)
# plot(nmshares_model)
## Checking residuals
# nmshares_res <- residuals(nmshares_model)
# autoplot(ts(nmshares_res)) + xlab("Month") + ylab("") +
# ggtitle("Residuals from Naive Method")
# gg_hist <- gghistogram(nmshares_res) + ggtitle("Histogram of Naive Method Residuals")
# ggplotly(gg_hist)
# ggAcf(nmshares_res, lag = 760)
### Attempting modeling share prices with linear regression model
# shares_lm = lm(shares$SPASTT01USM661N~ shares$DATE)
# summary(shares_lm) ###R^2 value of 83.96%
# tt <- 1:NROW(shares)
# plot(tt, shares$SPASTT01USM661N, xlab="Time", ylab="Index 2015 = 100", type = "p")
# abline(shares_lm)
head(euir)
euir$DATE <- as.Date(euir$DATE)
euir_df <- data.frame(date = euir$DATE, interest_rate = euir$IRSTCI01EZM156N)
summary(euir_df$interest_rate)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.5847 -0.0506 2.0518 1.9590 3.7900 6.8400
ggplot(euir_df, aes(x = date, y = interest_rate)) +
geom_line() +
labs(title = "Euro Interest Rate Over Time",
x = "Date", y = "Interest Rate")
# Check for Stationarity (Augmented Dickey-Fuller test)
adf.test(euir_df$interest_rate)
Augmented Dickey-Fuller Test
data: euir_df$interest_rate
Dickey-Fuller = -2.0654, Lag order = 7, p-value = 0.5493
alternative hypothesis: stationary
# Autocorrelation and Partial Autocorrelation Plots
ggtsdisplay(euir_df$interest_rate, lag = 900)
ggAcf(euir_df$interest_rate, lag.max = 700)
Linear regression model
train_data <- euir_df[1:(nrow(euir_df) - 3), ]
test_data <- euir_df[(nrow(euir_df) - 2):nrow(euir_df), ]
model <- lm(interest_rate ~ date, data = train_data)
summary(model)
Call:
lm(formula = interest_rate ~ date, data = train_data)
Residuals:
Min 1Q Median 3Q Max
-1.4724 -0.6525 -0.3006 0.3833 4.4335
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.030e+01 2.503e-01 41.16 <2e-16 ***
date -5.909e-04 1.729e-05 -34.18 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.012 on 352 degrees of freedom
Multiple R-squared: 0.7685, Adjusted R-squared: 0.7678
F-statistic: 1168 on 1 and 352 DF, p-value: < 2.2e-16
predictions <- predict(model, newdata = test_data)
residuals <- residuals(model)
residuals_test <- residuals(model)[1:nrow(test_data)]
rmse_value <- rmse(predictions, test_data$interest_rate)
mae_value <- mae(predictions, test_data$interest_rate)
cat("RMSE:", rmse_value, "\n")
RMSE: 4.842428
cat("MAE:", mae_value, "\n")
MAE: 4.840305
plot(test_data$date, test_data$interest_rate, col = "blue", type = "l", xlab = "Date", ylab = "Interest Rate")
lines(test_data$date, predictions, col = "red")
legend("topleft", legend = c("Actual", "Predicted"), col = c("blue", "red"), lty = 1)
# Residuals plot
plot(test_data$date, residuals_test, col = "green", type = "l", xlab = "Date", ylab = "Residuals")
plot(train_data$date, residuals, col = "green", type = "l", xlab = "Date", ylab = "Residuals train")
result_table <- data.frame(
Date = test_data$date,
Actual = test_data$interest_rate,
Predicted = predictions
)
print(result_table)
dwtest(model)
Durbin-Watson test
data: model
DW = 0.029354, p-value < 2.2e-16
alternative hypothesis: true autocorrelation is greater than 0
| ARIMA |
| ```r train_data <- euir_df[1:(nrow(euir_df) - 3), ] test_data <- euir_df[(nrow(euir_df) - 2):nrow(euir_df), ] |
| arima_model <- auto.arima(train_data$interest_rate) |
| summary(arima_model) ``` |
| ``` Series: train_data$interest_rate ARIMA(0,2,1) |
| Coefficients: ma1 -0.7618 s.e. 0.0400 |
| sigma^2 = 0.02614: log likelihood = 142 AIC=-279.99 AICc=-279.96 BIC=-272.26 |
| Training set error measures: ME RMSE MAE MPE MAPE MASE ACF1 Training set 0.004576847 0.1609926 0.09655125 -0.5811529 11.74419 0.9702804 -0.03495108 ``` |
r # Forecasting for the next 3 months arima_forecast <- forecast(arima_model, h = 3) print(arima_forecast) |
r arima_pred_values <- arima_forecast$mean |
r # Plotting ARIMA Forecast plot(arima_forecast, xlab = "Date", ylab = "Interest Rate Forecast") |
r # ARIMA residuals arima_residuals <- residuals(arima_model) plot(train_data$date, arima_residuals, col = "green", type = "l", xlab = "Date", ylab = "ARIMA Residuals") |
r forecasted_values <- arima_forecast$mean actual_values <- as.numeric(test_data$interest_rate) arima_residuals_test <- actual_values - forecasted_values |
| ```r #ARIMA forecasted values and residuals arima_table <- data.frame( Date = index(test_data), Actual_Values = actual_values, Forecasted_Interest_Rate = forecasted_values, Residuals = arima_residuals_test ) |
| # Print the ARIMA forecast and residuals table print(arima_table) ``` |
r NA |
r # Plotting ARIMA predicted values and residuals for the test data plot(test_data$date, arima_pred_values, col = "blue", type = "l", xlab = "Date", ylab = "ARIMA Predicted Values") lines(test_data$date, arima_residuals_test, col = "green") |
| ```r legend(“topright”, legend = c(“Predicted Values”, “Residuals”), col = c(“blue”, “green”), lty = 1) |
| ``` |
head(eucpi)
eucpi$DATE <- as.Date(eucpi$DATE)
eucpi_df <- data.frame(date = eucpi$DATE, cp_index = eucpi$EA19CPALTT01GYM)
summary(eucpi_df$cp_index)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.60 1.30 2.10 2.19 2.60 10.60
ggplot(eucpi_df, aes(x = date, y = cp_index)) +
geom_line() +
labs(title = "Euro CP Index Over Time",
x = "Date", y = "Index Rate")
# Check for Stationarity (Augmented Dickey-Fuller test)
adf.test(eucpi_df$cp_index)
Augmented Dickey-Fuller Test
data: eucpi_df$cp_index
Dickey-Fuller = -3.6941, Lag order = 7, p-value = 0.02453
alternative hypothesis: stationary
# Autocorrelation and Partial Autocorrelation Plots
ggtsdisplay(eucpi_df$cp_index, lag = 900)
ggAcf(eucpi_df$cp_index, lag.max = 700)
LINEAR REGRESSION
train_data <- eucpi_df[1:(nrow(eucpi_df) - 3), ]
test_data <- eucpi_df[(nrow(eucpi_df) - 2):nrow(eucpi_df), ]
model <- lm(cp_index ~ date, data = train_data)
summary(model)
Call:
lm(formula = cp_index ~ date, data = train_data)
Residuals:
Min 1Q Median 3Q Max
-2.6457 -0.7591 -0.1226 0.3644 8.9977
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.368e+00 3.097e-01 10.875 < 2e-16 ***
date -9.163e-05 2.231e-05 -4.107 4.91e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.464 on 380 degrees of freedom
Multiple R-squared: 0.0425, Adjusted R-squared: 0.03998
F-statistic: 16.87 on 1 and 380 DF, p-value: 4.912e-05
predictions <- predict(model, newdata = test_data)
residuals <- residuals(model)
residuals_test <- residuals(model)[1:nrow(test_data)]
rmse_value <- rmse(predictions, test_data$cp_index)
mae_value <- mae(predictions, test_data$cp_index)
cat("RMSE:", rmse_value, "\n")
RMSE: 7.75817
cat("MAE:", mae_value, "\n")
MAE: 7.736682
plot(test_data$date, test_data$cp_index, col = "blue", type = "l", xlab = "Date", ylab = "EU Consumer Price INdex")
lines(test_data$date, predictions, col = "red")
legend("topleft", legend = c("Actual", "Predicted"), col = c("blue", "red"), lty = 1)
# Residuals plot
plot(test_data$date, residuals_test, col = "green", type = "l", xlab = "Date", ylab = "Residuals test")
plot(train_data$date, residuals, col = "green", type = "l", xlab = "Date", ylab = "Residuals train")
result_table <- data.frame(
Date = test_data$date,
Actual = test_data$cp_index,
Predicted = predictions
)
print(result_table)
dwtest(model)
Durbin-Watson test
data: model
DW = 0.036438, p-value < 2.2e-16
alternative hypothesis: true autocorrelation is greater than 0
ARIMA (CP Index)
train_data <- eucpi_df[1:(nrow(eucpi_df) - 3), ]
test_data <- eucpi_df[(nrow(eucpi_df) - 2):nrow(eucpi_df), ]
arima_model <- auto.arima(train_data$cp_index)
summary(arima_model)
Series: train_data$cp_index
ARIMA(1,2,1)
Coefficients:
ar1 ma1
0.0681 -0.8891
s.e. 0.0620 0.0339
sigma^2 = 0.07102: log likelihood = -36.41
AIC=78.82 AICc=78.89 BIC=90.64
Training set error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.00887859 0.2651023 0.1945961 Inf Inf 0.9980657 -0.002914337
# Forecasting for the next 3 months
arima_forecast <- forecast(arima_model, h = 3)
arima_pred_values <- arima_forecast$mean
print(arima_forecast)
# Plotting ARIMA Forecast
plot(arima_forecast, xlab = "Date", ylab = "CP Index Forecast")
# ARIMA residuals
arima_residuals <- residuals(arima_model)
plot(train_data$date, arima_residuals, col = "green", type = "l", xlab = "Date", ylab = "ARIMA Residuals")
forecasted_values <- arima_forecast$mean
actual_values <- as.numeric(test_data$cp_index)
arima_residuals_test <- actual_values - forecasted_values
#ARIMA forecasted values and residuals
arima_table <- data.frame(
Date = index(test_data),
Actual_Values = actual_values,
Forecasted_CP_Index = forecasted_values,
Residuals = arima_residuals_test
)
# Print the ARIMA forecast and residuals table
print(arima_table)
NA
# Plotting ARIMA predicted values and residuals for the test data
plot(test_data$date, arima_pred_values, col = "blue", type = "l", xlab = "Date", ylab = "ARIMA Predicted Values")
lines(test_data$date, arima_residuals_test, col = "green")
legend("topright", legend = c("Predicted Values", "Residuals"), col = c("blue", "green"), lty = 1)
US RATE
rate$Period.Unit. <- as.Date(rate$Period.Unit.)
rate_df <- data.frame(Date = rate$Period.Unit., US_rate = rate$X.US.dollar..)
rate_df <- rate_df %>%
mutate(Date = format(Date, "%Y-%m"))
head(rate_df)
filtered_data <- rate_df %>%
group_by(Date) %>%
summarize(mean_rate = mean(as.numeric(US_rate)))
Warning: There were 42 warnings in `summarize()`.
The first warning was:
ℹ In argument: `mean_rate = mean(as.numeric(US_rate))`.
ℹ In group 12: `Date = "1999-12"`.
Caused by warning in `mean()`:
! NAs introduced by coercion
ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 41 remaining warnings.
filtered_data$Date <- paste0(filtered_data$Date, '-01')
filtered_data$Date <- as.Date(filtered_data$Date)
head(filtered_data)
summary(filtered_data$mean_rate)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.8532 1.0982 1.1786 1.1900 1.2987 1.5770 42
ggplot(filtered_data, aes(x = Date, y = mean_rate)) +
geom_line() +
labs(
title = "US Rate Over Time",
x = "Time",
y = "US Rate"
)
gg <- ggplot(filtered_data, aes(x = Date, y = mean_rate))+
geom_line(aes(group = 1), color = "blue", linewidth = 0.5) +
geom_point() +
labs(
title = "US Rate",
x = "Time",
y = "Rate"
) +
scale_x_date(date_labels = "%Y-%m", date_breaks = "1 year") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plotly_gg <- ggplotly(gg)
plotly_gg
str(euir_df)
'data.frame': 357 obs. of 2 variables:
$ date : Date, format: "1994-01-01" "1994-02-01" "1994-03-01" "1994-04-01" ...
$ interest_rate: num 6.84 6.73 6.68 6.22 5.85 6.32 6.27 5.94 6 6.13 ...
str(filtered_data)
tibble [293 × 2] (S3: tbl_df/tbl/data.frame)
$ Date : Date[1:293], format: "1999-01-01" "1999-02-01" "1999-03-01" "1999-04-01" ...
$ mean_rate: num [1:293] 1.16 1.12 1.09 1.07 1.06 ...
merged_df <- merge(merge(euir_df, eucpi_df, by = "date"), filtered_data, by.x = "date", by.y = "Date")
names(merged_df) <- c("date", "eu_ir", "eu_cpi", "us_rate")
head(merged_df)
tail(merged_df)
ARIMAX (US rate + EU interest rate)
merged_df$date <- as.Date(merged_df$date)
us_ts <- ts(merged_df$us_rate, frequency = 12, start = c(1999, 1))
# Split the data into train and test sets
train_data <- window(us_ts, start = c(1999, 1), end = c(2022, 10))
test_data <- window(us_ts, start = c(2022, 11))
external_regressor <- merged_df$eu_ir[merged_df$date >= as.Date("1999-01-01") & merged_df$date <= as.Date("2022-10-01")]
fit <- Arima(train_data, order = c(1, 0, 1), xreg = external_regressor)
forecast_values <- forecast(fit, xreg = merged_df$eu_ir[merged_df$date >= as.Date("2022-11-01")], h = 3)
predicted_values <- forecast_values$mean
actual_values <- window(us_ts, start = c(2022, 11))
result_table <- tibble(
Date = time(actual_values),
Actual_Values = actual_values,
Predicted_Values = predicted_values
)
plot(forecast_values, main = "ARIMAX Forecast for US Rate")
lines(actual_values, col = "blue")
legend("topleft", legend = c("Actual", "Forecast"), col = c("blue", "black"), lty = 1)
print(result_table)
ARIMAX (US rate + EU Consumer Price Index)
# Split the data into train and test sets
train_data <- window(us_ts, start = c(1999, 1), end = c(2022, 10))
test_data <- window(us_ts, start = c(2022, 11))
external_regressor <- merged_df$eu_cpi[merged_df$date >= as.Date("1999-01-01") & merged_df$date <= as.Date("2022-10-01")]
fit <- Arima(train_data, order = c(1, 0, 1), xreg = external_regressor)
forecast_values <- forecast(fit, xreg = merged_df$eu_cpi[merged_df$date >= as.Date("2022-11-01")], h = 3)
predicted_values <- forecast_values$mean
actual_values <- window(us_ts, start = c(2022, 11))
result_table <- tibble(
Date = time(actual_values),
Actual_Values = actual_values,
Predicted_Values = predicted_values
)
plot(forecast_values, main = "ARIMAX Forecast for US Rate")
lines(actual_values, col = "blue")
legend("topleft", legend = c("Actual", "Forecast"), col = c("blue", "black"), lty = 1)
print(result_table)